{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "from torch.autograd import Variable\n",
    "from torch import nn, optim\n",
    "from torch.nn import init"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "\n",
    "## 1. 标准RNN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 建立简单的循环神经网络\n",
    "\n",
    "basic_rnn = nn.RNN(input_size=3, hidden_size=5, num_layers=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Parameter containing:\n",
       " 0.0763 -0.0092 -0.1030\n",
       "-0.1201 -0.2317  0.3021\n",
       "-0.1568 -0.0355  0.3281\n",
       "-0.3040 -0.1278  0.2782\n",
       "-0.2930 -0.1496  0.2063\n",
       "[torch.FloatTensor of size 5x3]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 访问第一层网络的 $w_{ih}$\n",
    "\n",
    "basic_rnn.weight_ih_l0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Parameter containing:\n",
       " 0.2065  0.3279 -0.0407 -0.1670  0.2792\n",
       "-0.2776 -0.3277  0.2683  0.0603  0.1309\n",
       "-0.3711  0.3196  0.4190  0.0326 -0.3182\n",
       "-0.0235 -0.0512  0.3191 -0.1126  0.0228\n",
       " 0.2597 -0.2444  0.3148 -0.0795 -0.1418\n",
       "[torch.FloatTensor of size 5x5]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 访问第二层的 $w_{ih}$\n",
    "\n",
    "basic_rnn.weight_ih_l1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Parameter containing:\n",
       " 0.2150  0.0983  0.0374  0.2345 -0.4368\n",
       " 0.4327 -0.0015  0.4188  0.0039 -0.2717\n",
       " 0.4181 -0.2944 -0.0375 -0.3815 -0.3615\n",
       " 0.1330 -0.4197 -0.3870 -0.2852  0.3714\n",
       " 0.0946 -0.0085 -0.4026 -0.1688  0.2727\n",
       "[torch.FloatTensor of size 5x5]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 访问第一层的 $w_{hh}$\n",
    "\n",
    "basic_rnn.weight_hh_l1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Parameter containing:\n",
       "-0.0978\n",
       " 0.2153\n",
       " 0.2239\n",
       " 0.2170\n",
       " 0.0324\n",
       "[torch.FloatTensor of size 5]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 访问第一层的 $b_{ih}$\n",
    "\n",
    "basic_rnn.bias_ih_l0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 输入的序列长度为5，批量是10，维度是3\n",
    "toy_input = Variable(torch.randn(5, 10, 3))\n",
    "\n",
    "# 初始隐藏状态 layer * direction = 2, 批量是10，维度是5\n",
    "h_0 = Variable(torch.randn(2, 10, 5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([5, 10, 5])\n",
      "torch.Size([2, 10, 5])\n"
     ]
    }
   ],
   "source": [
    "toy_output, h_n = basic_rnn(toy_input, h_0)\n",
    "\n",
    "# toy_output的形状应该为 (5, 10, 5)\n",
    "# h_n 的形状应该为 (2, 10, 5)\n",
    "print(toy_output.shape)\n",
    "print(h_n.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "\n",
    "## 2. LSTM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "lstm = nn.LSTM(input_size=3, hidden_size=5, num_layers=2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "LSTM中间比标准RNN多了三个线性变换，多的三个线性变换的权重拼在一起，所以一共是4倍，同理偏置也是4倍。\n",
    "换句话说，LSTM里面做了4个类似标准RNN所做的运算，所以参数个数是标准RNN的4倍。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Parameter containing:\n",
       "-0.0370  0.3933  0.4261\n",
       " 0.0104  0.2447  0.2633\n",
       "-0.3971  0.1586  0.2650\n",
       " 0.1254  0.0325 -0.0926\n",
       "-0.1548 -0.2009 -0.1171\n",
       "-0.1834  0.3234  0.0946\n",
       "-0.3815  0.0475  0.3884\n",
       "-0.2534 -0.2354  0.2124\n",
       "-0.4156 -0.1013 -0.3804\n",
       "-0.1959  0.2669  0.0208\n",
       "-0.1414 -0.0864 -0.0238\n",
       " 0.4439 -0.2000 -0.0276\n",
       "-0.3399  0.2919  0.0363\n",
       "-0.0171  0.1917  0.4374\n",
       " 0.3896  0.0809 -0.4040\n",
       "-0.3879 -0.3903  0.1277\n",
       " 0.1634  0.3729  0.1317\n",
       "-0.2193  0.1497  0.0905\n",
       " 0.1066 -0.2967  0.0568\n",
       "-0.2763  0.0103  0.3772\n",
       "[torch.FloatTensor of size 20x3]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 参数的大小将变成 (4 * hidden_size, input_size) = (4 × 5, 3) = (20, 3)\n",
    "\n",
    "lstm.weight_ih_l0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# toy_input的size为 (5, 10, 3)\n",
    "\n",
    "lstm_out, (h_n, c_n) = lstm(toy_input)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "lstm_out_size =  torch.Size([5, 10, 5])\n",
      "h_n_size =  torch.Size([2, 10, 5])\n",
      "c_n_size =  torch.Size([2, 10, 5])\n"
     ]
    }
   ],
   "source": [
    "# lstm的输出矩阵应该是 (5, 10, 5)\n",
    "print('lstm_out_size = ', lstm_out.size())\n",
    "\n",
    "# h_n 和 c_n 的size应为 (2, 10, 5)\n",
    "print('h_n_size = ', h_n.size())\n",
    "print('c_n_size = ', c_n.size())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "\n",
    "## 3. GRU\n",
    "1. GRU的隐藏状态数量为标准RNN的3倍；\n",
    "2. 网络的隐藏状态不是 $h_0 \\text{和} c_0$，而是只有 $h_0$；\n",
    "3. 其余部分和LSTM相同；"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "## 4. 单步版本\n",
    "Pytorch提供 `RNNCell`, `LSTMCell`, `GRUCell`分别作为这三个模型的单步版本。\n",
    "\n",
    "它们的输入不再是一个序列，而是一个序列中的一步，也就是循环神经网络的一个循环。\n",
    "\n",
    "单步版本在序列的应用上更加灵活，能在基础上添加更多的自定义操作。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
